Graph Neural Networks (GNN)
Table of Contents
| Vertex | Edge | |
|---|---|---|
| People | like each other | undirected |
| People | is the boss of | directed |
| Tasks | cannot be processed at the same time | undirected |
| Computers | have a direct network connection | undirected |
| Airports | planes flies between them | directed |
| City | can travel between them | directed |
Undirected Graph vs. Directed Graph
Weighted Graph
Graph and Adjacency Matrix
# !pip install networkx
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
Graph.add_edge
g = nx.Graph()
g.add_edge('a', 'b')
g.add_edge('b', 'c')
g.add_edge('a', 'c')
g.add_edge('c', 'd')
# draw a graph with nodes and edges
nx.draw(g)
plt.show()
# draw a graph with node labels
pos = nx.spring_layout(g)
nx.draw_networkx_nodes(g, pos)
nx.draw_networkx_edges(g, pos)
nx.draw_networkx_labels(g, pos)
plt.axis('off')
plt.show()
Graph.add_nodes_from
Graph.add_edges_from
G = nx.Graph()
G.add_nodes_from([1,2,3,4])
G.add_edges_from([(1,2),(1,3),(2,3),(3,4)])
# plot a graph
pos = nx.spring_layout(G)
nx.draw(G, pos, node_size = 500)
nx.draw_networkx_labels(G, pos, font_size = 10)
plt.show()
print(nx.number_of_nodes(G))
print(nx.number_of_edges(G))
G.nodes()
G.edges()
A = nx.adjacency_matrix(G)
print(A)
print(A.todense())
Degree of Undirected Graph
$$ d_i = \sum_{j=1}^{n} \; A_{ij} $$
$$D = \text{diag}\{d_1, d_2, \cdots \}$$
Some nodes have many edges, but some don't
Adding $I$ is to add self-connecting edges
Considering neighboring nodes in the normalized weights
to prevent numerical instabilities and vanishing/exploding gradients in order for the model to converge
1) (First attempt) Normalized $\tilde A$
$$\tilde D^{-1}(A+I)$$2) Normalized $\tilde A$
$$\tilde A = \tilde D^{-1/2}(A+I) \tilde D^{-1/2}$$Now we consider a feature matrix $H$
import numpy as np
A = np.array([[0,1,1,1],
[1,0,0,0],
[1,0,0,1],
[1,0,1,0]])
A_self = A + np.eye(4)
print(A_self)
D = np.array(A_self.sum(1)).flatten()
D = np.diag(D)
print(D)
1) (First attempt) Normalized $\tilde A$
$$\tilde D^{-1}(A+I)$$A_norm = np.linalg.inv(D).dot(A_self)
print(A_norm)
2) Normalized $\tilde A$
$$\tilde A = \tilde D^{-1/2}(A+I) \tilde D^{-1/2}$$Now it is symmetric.
(Skip the details)
D = np.array(A_self.sum(1))
D_half_norm = np.power(D, -0.5).flatten()
D_half_norm = np.diag(D_half_norm)
A_self = np.asmatrix(A_self)
D_half_norm = np.asmatrix(D_half_norm)
A_half_norm = D_half_norm*A_self*D_half_norm
print(A_half_norm)
Convolution Layer
Weight Sharing
1) Message Aggregation from Local Neighborhood
2) Update
Adding a non-linear function: $k^{\text{th}}$ layer
$$ \begin{align*} H^{(k+1)} &= f \left( A, H^{(k)} \right) \\ & = \sigma \left( A H^{(k)} \, W \right) \end{align*} $$1) Message Passing with Self-Loops
2) Neighborhood Normalization
Finally Graph Convolutional Networks
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
G = nx.Graph()
G.add_nodes_from([1, 2, 3, 4, 5, 6])
G.add_edges_from([(1, 2), (1, 3), (2, 3), (1, 4), (4, 5), (4, 6), (5, 6)])
nx.draw(G, with_labels = True)
plt.show()
A = nx.adjacency_matrix(G).todense()
print(A)
Assign feature vector H, so that it can be separated into two groups
H = np.matrix([1,0,0,-1,0,0]).T
print(H)
Product of Adjacency Matrix and Node Features Matrix represents the sum of neighboring node features
A*H
A_self = A + np.eye(6)
print(A_self)
Similar to data pre-processing for any Neural Networks operation, normalize the features to prevent numerical instabilities and vanishing / exploding gradients in order for the model to converge
D = np.array(A_self.sum(1))
print(D)
D_half_norm = np.power(D, -0.5).flatten()
D_half_norm = np.diag(D_half_norm)
A_self = np.asmatrix(A_self)
D_half_norm = np.asmatrix(D_half_norm)
A_half_norm = D_half_norm*A_self*D_half_norm
print(A_half_norm)
A_half_norm*H
Build 2-layer GCN using ReLU as the activation function
np.random.seed(1)
W1 = np.random.randn(1, 4) # input: 1 -> hidden: 4
W2 = np.random.randn(4, 2) # hidden: 4 -> output: 2
def relu(x):
return np.maximum(0, x)
def gcn(A, H, W):
D = np.array((A + np.eye(6)).sum(1))
D_half_norm = np.diag((np.power(D, -0.5)).flatten())
H_new = D_half_norm*A_self*D_half_norm*H*W
return relu(H_new)
H1 = gcn(A, H, W1)
H2 = gcn(A, H1, W2)
print(H2)
Adjacency matrix can be different even though two graph has the same network structure
Therefore, in graph-level representation,Readout layer makes this permutation invariant by multiplying MLP
Task 1: Node classification
Task 2: Edges prediction
Task 3: Graph classification
# !pip install spektral==0.6.0
# !pip install tensorflow==2.2.0
# !pip install keras==2.3.0
import numpy as np
import networkx as nx
from tensorflow.keras.utils import to_categorical
from spektral.layers import GraphConv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from collections import Counter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
classes = ['Case_Based',
'Genetic_Algorithms',
'Neural_Networks',
'Probabilistic_Methods',
'Reinforcement_Learning',
'Rule_Learning',
'Theory']
labels_encoded = np.load('./data_files/cora_labels_encoded.npy')
nodes = np.load('./data_files/cora_nodes.npy')
edge_list = np.load('./data_files/cora_edges.npy')
X = np.load('./data_files/cora_features.npy')
data_mask = np.load('./data_files/cora_mask.npy')
N = X.shape[0]
F = X.shape[1]
print('X shape: ', X.shape)
print('\nNumber of nodes (N): ', N)
print('\nNumber of features (F) of each node: ', F)
print('\nCategories: ', classes)
num_classes = len(classes)
print('\nNumber of classes: ', num_classes)
In case of GCN, it is possible to train with small number of data compared to supervised method because it is semi-supervised method. Therefore, in this task, train dataset will consist of 20 data for each class. Likewise, execute validation and test with 500 validation dataset and 1000 test dataset.
# Load index of node for train model
train_mask = data_mask[0]
# Load index of node for validate model
val_mask = data_mask[1]
# Load index of node for test model
test_mask = data_mask[2]
print("All Number of Node for Node Classification: ", len(labels))
print("\n")
print("Number of Trainig Data: ", np.sum(train_mask))
print("\n")
print("Number of Validation Data: ", np.sum(val_mask))
print("\n")
print("Number of Test Data: ", np.sum(test_mask))
G = nx.Graph(name = 'Cora')
G.add_nodes_from(nodes)
G.add_edges_from(edge_list)
print('Graph info: ', nx.info(G))
A = nx.adjacency_matrix(G)
I = np.eye(A.shape[-1], dtype=A.dtype)
A_self = A + I
degree = np.array(A_self.sum(1))
D_half_norm = np.power(degree, -0.5).flatten()
D = np.diag(D_half_norm)
print('D:\n', D)
DAD = D * A_self * D
print('\nDAD:\n', DAD)
DAD = np.array(DAD, dtype = np.float32)
X = np.array(X, dtype = np.float32)
channels = 16
dropout = 0.5
l2_reg = 5e-4
learning_rate = 1e-2
epochs = 100
es_patience = 10
X_in = Input(shape = (F, ))
fltr_in = Input((N, ), sparse = True)
dropout_1 = Dropout(dropout)(X_in)
graph_conv_1 = GraphConv(channels,
activation = 'relu',
kernel_regularizer = l2(l2_reg),
use_bias = False)([dropout_1, fltr_in])
dropout_2 = Dropout(dropout)(graph_conv_1)
graph_conv_2 = GraphConv(num_classes,
activation = 'softmax',
use_bias = False)([dropout_2, fltr_in])
model = Model(inputs = [X_in, fltr_in], outputs = graph_conv_2)
optimizer = Adam(lr = learning_rate)
model.compile(optimizer = optimizer,
loss = 'categorical_crossentropy',
weighted_metrics = ['acc'])
model.summary()
validation_data = ([X, DAD], labels_encoded, val_mask)
model.fit([X, DAD],
labels_encoded,
sample_weight = train_mask,
epochs = epochs,
batch_size = N,
validation_data = validation_data,
shuffle = False,)
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
X_te = X[test_mask]
A_te = DAD[test_mask,:][:,test_mask]
y_te = labels_encoded[test_mask]
y_pred = model.predict([X_te, A_te], batch_size = N)
cm = confusion_matrix(np.argmax(y_te, axis = 1), np.argmax(y_pred, axis = 1))
print('Confusion matrix')
print(cm)
# plot_confusion_matrix(np.argmax(y_te, axis=1), np.argmax(y_pred, axis=1), classes, fontsize=15)
plt.figure()
plt.imshow(cm, interpolation = 'nearest', cmap = plt.cm.Blues)
fmt = 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
plt.text(j, i, format(cm[i, j], fmt), ha = "center", va = "center", color = "white" if cm[i, j] > thresh else "black", fontsize = 15)
plt.colorbar()
plt.tight_layout()
plt.show()
layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs = model.input, outputs = layer_outputs)
activations = activation_model.predict([X,DAD],batch_size = N)
x_tsne = TSNE(n_components = 2).fit_transform(activations[3])
def plot_tSNE(labels_encoded,x_tsne):
color_map = np.argmax(labels_encoded, axis = 1)
plt.figure(figsize = (10,10))
for cl in range(num_classes):
indices = np.where(color_map == cl)
indices = indices[0]
plt.scatter(x_tsne[indices,0], x_tsne[indices, 1], label = cl)
plt.legend()
plt.show()
plot_tSNE(labels_encoded,x_tsne)
%%html
<center><iframe src="https://www.youtube.com/embed/fOctJB4kVlM?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/ABCGCf8cJOE?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/0YLZXjMHA-8?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/ex2qllcVneY?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/YL1jGgcY78U?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/8owQBFAHw7E?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/R67-JxtOQzg?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
University cources:
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')